Follow the article, “How to Run Python’s Scikit-Learn in R in 5 minutes”
Install the Anaconda Distribution
Get Python Scikit Learn Setup in R
Make “py3.8” conda environment with scikit-learn, numpy, pandas and matplotlib.
library(reticulate)
# Replace this with your conda environment containing sklearn, pandas, & numpy
use_condaenv("py3.8", required = TRUE)
library(tidyverse)
library(lubridate)
hr_data_tbl <- read_csv("data/HRDataset_v13.csv")
hr_data_tbl
## # A tibble: 401 x 35
## Employee_Name EmpID MarriedID MaritalStatusID GenderID EmpStatusID DeptID
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Brown, Mia 1.10e9 1 1 0 1 1
## 2 LaRotonda, W… 1.11e9 0 2 1 1 1
## 3 Steans, Tyro… 1.30e9 0 0 1 1 1
## 4 Howard, Este… 1.21e9 1 1 0 1 1
## 5 Singh, Nan 1.31e9 0 0 0 1 1
## 6 Smith, Leigh… 7.11e8 1 1 0 5 1
## 7 Bunbury, Jes… 1.50e9 1 1 0 5 6
## 8 Carter, Mich… 1.40e9 0 0 0 1 6
## 9 Dietrich, Je… 1.41e9 0 0 0 1 6
## 10 Digitale, Al… 1.31e9 1 1 1 1 6
## # … with 391 more rows, and 28 more variables: PerfScoreID <dbl>,
## # FromDiversityJobFairID <dbl>, PayRate <dbl>, Termd <dbl>, PositionID <dbl>,
## # Position <chr>, State <chr>, Zip <chr>, DOB <chr>, Sex <chr>,
## # MaritalDesc <chr>, CitizenDesc <chr>, HispanicLatino <chr>, RaceDesc <chr>,
## # DateofHire <chr>, DateofTermination <chr>, TermReason <chr>,
## # EmploymentStatus <chr>, Department <chr>, ManagerName <chr>,
## # ManagerID <dbl>, RecruitmentSource <chr>, PerformanceScore <chr>,
## # EngagementSurvey <dbl>, EmpSatisfaction <dbl>, SpecialProjectsCount <dbl>,
## # LastPerformanceReview_Date <chr>, DaysLateLast30 <dbl>
hr_data_tbl %>% glimpse()
hr_data_tbl <- hr_data_tbl %>%
mutate(AgeRel = ( mdy(DOB) - min(mdy(DOB), na.rm = TRUE) ) / dyears(1)) %>%
mutate(TenureRel = ( mdy(DateofHire) - min(mdy(DateofHire), na.rm = TRUE) ) / dyears(1))
hr_data_tbl %>% select(AgeRel, TenureRel)
## # A tibble: 401 x 2
## AgeRel TenureRel
## <dbl> <dbl>
## 1 18.8 2.80
## 2 15.2 7.99
## 3 17.6 8.72
## 4 16.6 9.10
## 5 19.3 9.31
## 6 18.3 5.71
## 7 95.3 5.60
## 8 94.3 8.61
## 9 18.3 6.11
## 10 19.6 8.61
## # … with 391 more rows
hr_data_tbl %>%
ggplot(aes(AgeRel)) +
geom_histogram()
library(DataExplorer)
hr_data_tbl %>% plot_missing()
selections <- c(
"Employee_Name",
"Sex", "MaritalDesc",
"PayRate", "Department",
"AgeRel"
)
hr_subset_tbl <- hr_data_tbl %>%
select(one_of(selections)) %>%
drop_na()
hr_subset_tbl %>% glimpse()
## Rows: 310
## Columns: 6
## $ Employee_Name <chr> "Brown, Mia", "LaRotonda, William", "Steans, Tyrone", "…
## $ Sex <chr> "F", "M", "M", "F", "F", "F", "F", "F", "F", "M", "M", …
## $ MaritalDesc <chr> "Married", "Divorced", "Single", "Married", "Single", "…
## $ PayRate <dbl> 28.50, 23.00, 29.00, 21.50, 16.56, 20.50, 55.00, 55.00,…
## $ Department <chr> "Admin Offices", "Admin Offices", "Admin Offices", "Adm…
## $ AgeRel <dbl> 18.78713210, 15.20876112, 17.55783710, 16.59958932, 19.…
hr_subset_tbl %>% plot_missing()
library(recipes)
rec_obj <- recipe(~ ., hr_subset_tbl) %>%
step_rm(Employee_Name) %>%
step_normalize(all_numeric()) %>%
step_dummy(all_nominal()) %>%
prep()
rec_obj
## Data Recipe
##
## Inputs:
##
## role #variables
## predictor 6
##
## Training data contained 310 data points and no missing data.
##
## Operations:
##
## Variables removed Employee_Name [trained]
## Centering and scaling for PayRate, AgeRel [trained]
## Dummy variables from Sex, MaritalDesc, Department [trained]
hr_subset_processed_tbl <- juice(rec_obj)
hr_subset_processed_tbl %>% glimpse()
## Rows: 310
## Columns: 12
## $ PayRate <dbl> -0.1810242, -0.5385474, -0.1485221, -…
## $ AgeRel <dbl> -0.16653976, -0.29271412, -0.20988505…
## $ Sex_M <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1…
## $ MaritalDesc_Married <dbl> 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1…
## $ MaritalDesc_Separated <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ MaritalDesc_Single <dbl> 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0…
## $ MaritalDesc_Widowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ Department_Executive.Office <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Department_IT.IS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Department_Production <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Department_Sales <dbl> 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1…
## $ Department_Software.Engineering <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
hr_subset_processed_tbl %>%
ggplot(aes(AgeRel)) +
geom_histogram()
# Prep for Python
X <- as.matrix(hr_subset_processed_tbl)
employee_names <- hr_subset_tbl$Employee_Name
# Data Manipulation
import pandas as pd
import numpy as np
r.X
## array([[-0.18102419, -0.16653976, 0. , ..., 0. ,
## 0. , 0. ],
## [-0.53854743, -0.29271412, 1. , ..., 0. ,
## 0. , 0. ],
## [-0.14852207, -0.20988505, 1. , ..., 0. ,
## 0. , 0. ],
## ...,
## [ 1.46358272, -0.33538365, 1. , ..., 0. ,
## 0. , 0. ],
## [ 1.41157934, 2.57647355, 1. , ..., 0. ,
## 0. , 0. ],
## [ 1.55458864, -0.50374485, 0. , ..., 0. ,
## 0. , 0. ]])
pd.Series(r.employee_names)
## 0 Brown, Mia
## 1 LaRotonda, William
## 2 Steans, Tyrone
## 3 Howard, Estelle
## 4 Singh, Nan
## ...
## 305 Daniele, Ann
## 306 Lajiri, Jyoti
## 307 Semizoglou, Jeremiah
## 308 South, Joe
## 309 Warfield, Sarah
## Length: 310, dtype: object
# Machine Learning
from sklearn.cluster import AffinityPropagation
af = AffinityPropagation().fit(r.X)
af
## AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
## damping=0.5, max_iter=200, preference=None, verbose=False)
af.cluster_centers_indices_
## array([ 8, 15, 22, 27, 39, 42, 43, 48, 54, 82, 84, 90, 101,
## 106, 110, 144, 166, 176, 184, 249, 286, 289, 306])
cluster_assignments_af = af.labels_
cluster_assignments_af
## array([13, 15, 16, 13, 11, 13, 8, 2, 0, 3, 1, 0, 8, 1, 3, 1, 1,
## 3, 3, 1, 3, 0, 2, 0, 3, 1, 1, 3, 0, 0, 3, 2, 2, 7,
## 20, 5, 5, 22, 4, 4, 6, 4, 5, 6, 7, 5, 5, 7, 7, 5, 5,
## 5, 6, 9, 8, 20, 20, 18, 22, 20, 6, 12, 6, 7, 5, 5, 7, 7,
## 20, 7, 7, 5, 22, 8, 2, 9, 9, 9, 9, 10, 10, 10, 9, 10, 10,
## 9, 10, 19, 11, 16, 11, 19, 14, 11, 13, 13, 15, 14, 11, 14, 13, 12,
## 19, 11, 13, 14, 13, 13, 15, 11, 14, 13, 18, 14, 15, 19, 18, 11, 13,
## 13, 16, 11, 16, 14, 11, 14, 13, 11, 16, 17, 17, 11, 17, 13, 11, 11,
## 13, 11, 17, 14, 13, 13, 17, 13, 15, 13, 12, 11, 16, 16, 13, 16, 14,
## 11, 17, 18, 16, 12, 14, 12, 12, 14, 13, 15, 18, 11, 16, 11, 11, 16,
## 16, 11, 14, 13, 13, 12, 17, 11, 14, 17, 16, 13, 16, 13, 18, 11, 13,
## 11, 17, 17, 16, 17, 15, 16, 15, 12, 11, 16, 13, 13, 19, 16, 13, 11,
## 16, 15, 16, 14, 11, 17, 16, 16, 14, 19, 14, 12, 13, 11, 16, 16, 13,
## 11, 13, 16, 11, 16, 17, 13, 13, 11, 13, 19, 11, 16, 13, 12, 14, 13,
## 17, 14, 17, 11, 12, 13, 11, 11, 11, 12, 19, 19, 18, 16, 11, 14, 16,
## 14, 14, 13, 17, 11, 13, 16, 13, 16, 17, 14, 11, 14, 16, 11, 17, 13,
## 17, 15, 11, 16, 11, 17, 13, 19, 15, 0, 0, 3, 4, 20, 20, 3, 3,
## 21, 21, 21, 21, 21, 21, 21, 21, 21, 12, 7, 7, 22, 4, 2, 4, 8,
## 22, 22, 2, 4])
from sklearn.cluster import DBSCAN
db = DBSCAN(min_samples=5).fit(r.X)
db
## DBSCAN(algorithm='auto', eps=0.5, leaf_size=30, metric='euclidean',
## metric_params=None, min_samples=5, n_jobs=None, p=None)
cluster_assignments_db = db.labels_
cluster_assignments_db
## array([-1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 1, -1, -1, 1, -1, 1, 1,
## -1, -1, 1, -1, 0, -1, 0, -1, 1, 1, -1, 0, 0, -1, -1, -1, 2,
## -1, 3, 3, -1, -1, -1, -1, -1, 3, -1, 2, -1, 3, 2, 2, 3, -1,
## -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, 2, -1,
## -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
## -1, -1, -1, 4, 5, 4, -1, 6, 4, 7, 7, -1, 6, 4, 6, 7, 8,
## -1, 4, 7, 6, 7, 7, 10, 4, 6, 7, -1, 6, -1, -1, -1, 4, 7,
## 7, 5, 4, 5, 6, 4, 6, 7, 4, 5, -1, 9, 4, -1, 7, 4, 4,
## 7, 4, 9, 6, 7, 7, 9, 7, 10, 7, 8, 4, 5, 5, 7, 5, 6,
## 4, 9, -1, 5, 8, 6, -1, 8, 6, 7, -1, -1, 4, 5, 4, 4, 5,
## 5, 4, 6, 7, 7, 8, 9, 4, 6, -1, 5, 7, 5, 7, -1, 4, 7,
## 4, 9, 9, 5, 9, 10, 5, 10, -1, 4, 5, 7, 7, -1, 5, 7, 4,
## 5, 10, 5, 6, 4, 9, 5, 5, 6, -1, 6, 8, 7, 4, 5, 5, 7,
## 4, 7, 5, 4, 5, 9, 7, 7, 4, 7, -1, 4, 5, 7, -1, 6, 7,
## 9, 6, 9, 4, 8, 7, 4, 4, 4, -1, -1, -1, -1, 5, 4, 6, 5,
## 6, 6, 7, 9, 4, 7, 5, 7, 5, -1, 6, 4, 6, 5, 4, -1, 7,
## -1, -1, 4, 5, 4, -1, 7, -1, -1, 0, 0, -1, -1, -1, -1, -1, -1,
## -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
## -1, -1, -1, -1])
Needed to create a reduced representation of the original data in 2-D space.
from sklearn.manifold import TSNE
X_embedded = TSNE(n_components=2, random_state=123).fit_transform(r.X)
pd.DataFrame(X_embedded)
## 0 1
## 0 2.663728 10.084321
## 1 6.798886 -10.097016
## 2 5.099703 -14.169149
## 3 3.235934 9.874836
## 4 27.622253 -7.657422
## .. ... ...
## 305 -20.879011 -0.229556
## 306 -4.612731 13.428928
## 307 -4.403336 13.283936
## 308 -22.674707 -0.892780
## 309 -8.842725 11.582272
##
## [310 rows x 2 columns]
# Affinity Propogation
py$cluster_assignments_af
## [1] 13 15 16 13 11 13 8 2 0 3 1 0 8 1 3 1 1 3 3 1 3 0 2 0 3
## [26] 1 1 3 0 0 3 2 2 7 20 5 5 22 4 4 6 4 5 6 7 5 5 7 7 5
## [51] 5 5 6 9 8 20 20 18 22 20 6 12 6 7 5 5 7 7 20 7 7 5 22 8 2
## [76] 9 9 9 9 10 10 10 9 10 10 9 10 19 11 16 11 19 14 11 13 13 15 14 11 14
## [101] 13 12 19 11 13 14 13 13 15 11 14 13 18 14 15 19 18 11 13 13 16 11 16 14 11
## [126] 14 13 11 16 17 17 11 17 13 11 11 13 11 17 14 13 13 17 13 15 13 12 11 16 16
## [151] 13 16 14 11 17 18 16 12 14 12 12 14 13 15 18 11 16 11 11 16 16 11 14 13 13
## [176] 12 17 11 14 17 16 13 16 13 18 11 13 11 17 17 16 17 15 16 15 12 11 16 13 13
## [201] 19 16 13 11 16 15 16 14 11 17 16 16 14 19 14 12 13 11 16 16 13 11 13 16 11
## [226] 16 17 13 13 11 13 19 11 16 13 12 14 13 17 14 17 11 12 13 11 11 11 12 19 19
## [251] 18 16 11 14 16 14 14 13 17 11 13 16 13 16 17 14 11 14 16 11 17 13 17 15 11
## [276] 16 11 17 13 19 15 0 0 3 4 20 20 3 3 21 21 21 21 21 21 21 21 21 12 7
## [301] 7 22 4 2 4 8 22 22 2 4
# DBSCAN
py$cluster_assignments_db
## [1] -1 -1 -1 -1 -1 -1 -1 -1 0 -1 1 -1 -1 1 -1 1 1 -1 -1 1 -1 0 -1 0 -1
## [26] 1 1 -1 0 0 -1 -1 -1 2 -1 3 3 -1 -1 -1 -1 -1 3 -1 2 -1 3 2 2 3
## [51] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 3 2 -1 -1 -1 2 -1 -1 -1 -1
## [76] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 5 4 -1 6 4 7 7 -1 6 4 6
## [101] 7 8 -1 4 7 6 7 7 10 4 6 7 -1 6 -1 -1 -1 4 7 7 5 4 5 6 4
## [126] 6 7 4 5 -1 9 4 -1 7 4 4 7 4 9 6 7 7 9 7 10 7 8 4 5 5
## [151] 7 5 6 4 9 -1 5 8 6 -1 8 6 7 -1 -1 4 5 4 4 5 5 4 6 7 7
## [176] 8 9 4 6 -1 5 7 5 7 -1 4 7 4 9 9 5 9 10 5 10 -1 4 5 7 7
## [201] -1 5 7 4 5 10 5 6 4 9 5 5 6 -1 6 8 7 4 5 5 7 4 7 5 4
## [226] 5 9 7 7 4 7 -1 4 5 7 -1 6 7 9 6 9 4 8 7 4 4 4 -1 -1 -1
## [251] -1 5 4 6 5 6 6 7 9 4 7 5 7 5 -1 6 4 6 5 4 -1 7 -1 -1 4
## [276] 5 4 -1 7 -1 -1 0 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [301] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
X_embedded_tbl <- py$X_embedded %>% as_tibble()
X_embedded_tbl
## # A tibble: 310 x 2
## V1 V2
## <dbl> <dbl>
## 1 2.66 10.1
## 2 6.80 -10.1
## 3 5.10 -14.2
## 4 3.24 9.87
## 5 27.6 -7.66
## 6 3.28 9.86
## 7 -21.3 -0.0448
## 8 -22.2 -0.0107
## 9 -11.1 12.9
## 10 -7.94 15.1
## # … with 300 more rows
library(plotly)
library(tidyquant)
employee_clustering_tbl <- tibble(
Employee_Name = employee_names,
cluster_af = py$cluster_assignments_af,
cluster_db = py$cluster_assignments_db,
) %>%
bind_cols(X_embedded_tbl) %>%
left_join(hr_data_tbl)
employee_clustering_tbl
## # A tibble: 310 x 41
## Employee_Name cluster_af cluster_db V1 V2 EmpID MarriedID
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Brown, Mia 13 -1 2.66 10.1 1.10e9 1
## 2 LaRotonda, W… 15 -1 6.80 -10.1 1.11e9 0
## 3 Steans, Tyro… 16 -1 5.10 -14.2 1.30e9 0
## 4 Howard, Este… 13 -1 3.24 9.87 1.21e9 1
## 5 Singh, Nan 11 -1 27.6 -7.66 1.31e9 0
## 6 Smith, Leigh… 13 -1 3.28 9.86 7.11e8 1
## 7 Bunbury, Jes… 8 -1 -21.3 -0.0448 1.50e9 1
## 8 Carter, Mich… 2 -1 -22.2 -0.0107 1.40e9 0
## 9 Dietrich, Je… 0 0 -11.1 12.9 1.41e9 0
## 10 Digitale, Al… 3 -1 -7.94 15.1 1.31e9 1
## # … with 300 more rows, and 34 more variables: MaritalStatusID <dbl>,
## # GenderID <dbl>, EmpStatusID <dbl>, DeptID <dbl>, PerfScoreID <dbl>,
## # FromDiversityJobFairID <dbl>, PayRate <dbl>, Termd <dbl>, PositionID <dbl>,
## # Position <chr>, State <chr>, Zip <chr>, DOB <chr>, Sex <chr>,
## # MaritalDesc <chr>, CitizenDesc <chr>, HispanicLatino <chr>, RaceDesc <chr>,
## # DateofHire <chr>, DateofTermination <chr>, TermReason <chr>,
## # EmploymentStatus <chr>, Department <chr>, ManagerName <chr>,
## # ManagerID <dbl>, RecruitmentSource <chr>, PerformanceScore <chr>,
## # EngagementSurvey <dbl>, EmpSatisfaction <dbl>, SpecialProjectsCount <dbl>,
## # LastPerformanceReview_Date <chr>, DaysLateLast30 <dbl>, AgeRel <dbl>,
## # TenureRel <dbl>
attrition_rate_tbl <- employee_clustering_tbl %>%
select(cluster_db, Termd) %>%
group_by(cluster_db) %>%
summarise(
term_rate = sum(Termd) / length(Termd),
term_count = n()
) %>%
arrange(desc(term_rate))
attrition_rate_tbl
## # A tibble: 12 x 3
## cluster_db term_rate term_count
## <dbl> <dbl> <int>
## 1 10 0.8 5
## 2 9 0.769 13
## 3 8 0.571 7
## 4 7 0.45 40
## 5 6 0.375 24
## 6 3 0.333 6
## 7 5 0.312 32
## 8 4 0.293 41
## 9 -1 0.262 122
## 10 0 0.143 7
## 11 1 0.143 7
## 12 2 0 6
g <- attrition_rate_tbl %>%
mutate(cluster_db = as_factor(cluster_db) %>% fct_reorder(term_count)) %>%
ggplot(aes(term_count, cluster_db)) +
geom_col(aes(fill = term_rate)) +
theme_tq() +
labs(title = "Attrition Rate by Employee Cluster",
fill = "Attr. Rate", x = "Attrition Count", y = "Cluster Assignment")
ggplotly(g)
data_formatted <- employee_clustering_tbl %>%
left_join(attrition_rate_tbl) %>%
mutate(description = str_glue("{Employee_Name}
Position = {Position}
MaritalDesc = {MaritalDesc}
Sex = {Sex}
Race = {RaceDesc}
EmpStatusID = {EmpStatusID}
PayRate = {PayRate}
Terminated = {Termd}
Term Reason = {TermReason}
Cluster Term Rate: {scales::percent(term_rate)}
Cluster Term Count: {term_count}
")
) %>%
select(Employee_Name:V2, description, Termd,
term_rate, term_count)
g <- data_formatted %>%
ggplot(aes(V1, V2, color = factor(cluster_db))) +
geom_point(aes(text = description, size = term_rate), alpha = 0.5) +
scale_color_tq() +
theme_tq() +
# theme(legend.position = "none") +
labs(title = "Employee Cluster Assignments", color = "Cluster")
ggplotly(g)
# Bonus #1!!!
source_python("py/clustering.py")
source_python("py/tsne.py")
# Calls def cluster_dbscan()
cluster_dbscan(X)
## [1] -1 -1 -1 -1 -1 -1 -1 -1 0 -1 1 -1 -1 1 -1 1 1 -1 -1 1 -1 0 -1 0 -1
## [26] 1 1 -1 0 0 -1 -1 -1 2 -1 3 3 -1 -1 -1 -1 -1 3 -1 2 -1 3 2 2 3
## [51] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 3 2 -1 -1 -1 2 -1 -1 -1 -1
## [76] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 4 5 4 -1 6 4 7 7 -1 6 4 6
## [101] 7 8 -1 4 7 6 7 7 10 4 6 7 -1 6 -1 -1 -1 4 7 7 5 4 5 6 4
## [126] 6 7 4 5 -1 9 4 -1 7 4 4 7 4 9 6 7 7 9 7 10 7 8 4 5 5
## [151] 7 5 6 4 9 -1 5 8 6 -1 8 6 7 -1 -1 4 5 4 4 5 5 4 6 7 7
## [176] 8 9 4 6 -1 5 7 5 7 -1 4 7 4 9 9 5 9 10 5 10 -1 4 5 7 7
## [201] -1 5 7 4 5 10 5 6 4 9 5 5 6 -1 6 8 7 4 5 5 7 4 7 5 4
## [226] 5 9 7 7 4 7 -1 4 5 7 -1 6 7 9 6 9 4 8 7 4 4 4 -1 -1 -1
## [251] -1 5 4 6 5 6 6 7 9 4 7 5 7 5 -1 6 4 6 5 4 -1 7 -1 -1 4
## [276] 5 4 -1 7 -1 -1 0 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
## [301] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
# calls def tsne_embedding()
tsne_embedding(X) %>% head()
## [,1] [,2]
## [1,] 2.663728 10.084321
## [2,] 6.798886 -10.097016
## [3,] 5.099703 -14.169149
## [4,] 3.235934 9.874836
## [5,] 27.622253 -7.657422
## [6,] 3.280085 9.856876
# Bonus #2!!!
knitr::include_graphics("img/shiny_app.jpg")